{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Text Classification with RAG"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Creating a Knowledge Base Text File"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 284,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define the knowledge base content\n",
    "knowledge_base = [\n",
    "    \"Category 1 - Login Issues - Login issues often occur due to incorrect passwords or account lockouts.\",\n",
    "    \"Category 2 - App Functionality - App crashes can be caused by outdated software or device incompatibility.\",\n",
    "    \"Category 3 - Billing - Billing discrepancies may result from processing errors or duplicate transactions.\",\n",
    "    \"Category 4 - Account Management - Account management includes tasks such as changing profile information, linking social media accounts, and managing privacy settings.\",\n",
    "    \"Category 5 - Performance Issues - Performance issues can be related to device specifications, network connectivity, or app optimization.\"\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 285,
   "metadata": {},
   "outputs": [],
   "source": [
    "file_path = \"Data/knowledge_base.txt\"\n",
    "\n",
    "# Save the knowledge base to the file\n",
    "with open(file_path, \"w\") as file:\n",
    "    for entry in knowledge_base:\n",
    "        file.write(entry + \"\\n\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Using Groqcloud and Llama 3.1 model with Huggingface Embeddings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 286,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "from dotenv import load_dotenv\n",
    "load_dotenv()\n",
    "\n",
    "os.environ['GROQ_API_KEY'] = os.getenv(\"GROQ_API_KEY\")\n",
    "os.environ['HF_TOKEN'] = os.getenv(\"HF_TOKEN\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 287,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x000002D3FCD74370>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x000002D3FCD76D40>, model_name='llama-3.1-8b-instant', groq_api_key=SecretStr('**********'))"
      ]
     },
     "execution_count": 287,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from langchain_groq import ChatGroq\n",
    "\n",
    "model = ChatGroq(model=\"llama-3.1-8b-instant\")\n",
    "model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 288,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "f:\\College\\Masters\\Summer 2024\\LLM Projects\\Text Classification RAG\\class-rag\\lib\\site-packages\\transformers\\tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n",
      "  warnings.warn(\n"
     ]
    }
   ],
   "source": [
    "from langchain_huggingface import HuggingFaceEmbeddings\n",
    "\n",
    "embeddings=HuggingFaceEmbeddings(model_name=\"all-MiniLM-L6-v2\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Creating a Retrieval-Augmented Generation Chain"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 289,
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain_community.vectorstores import FAISS\n",
    "from langchain_community.document_loaders import TextLoader\n",
    "from langchain_core.prompts import ChatPromptTemplate\n",
    "from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
    "from langchain.chains import create_retrieval_chain\n",
    "from langchain.chains.combine_documents import create_stuff_documents_chain"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Data Ingestion and Text Splitting"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 290,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[Document(metadata={'source': 'Data/knowledge_base.txt'}, page_content='Category 1 - Login Issues - Login issues often occur due to incorrect passwords or account lockouts.\\nCategory 2 - App Functionality - App crashes can be caused by outdated software or device incompatibility.\\nCategory 3 - Billing - Billing discrepancies may result from processing errors or duplicate transactions.\\nCategory 4 - Account Management - Account management includes tasks such as changing profile information, linking social media accounts, and managing privacy settings.\\nCategory 5 - Performance Issues - Performance issues can be related to device specifications, network connectivity, or app optimization.\\n')]"
      ]
     },
     "execution_count": 290,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "loader = TextLoader(file_path)\n",
    "docs = loader.load()\n",
    "docs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 291,
   "metadata": {},
   "outputs": [],
   "source": [
    "text_splitter = RecursiveCharacterTextSplitter(chunk_size=200)\n",
    "splits = text_splitter.split_documents(docs)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Creating a Retriever"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 292,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000002D3FCD75D80>)"
      ]
     },
     "execution_count": 292,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Create a Vector Store\n",
    "vectorstore = FAISS.from_documents(documents=splits, embedding=embeddings)\n",
    "retriever = vectorstore.as_retriever(k=3) # Using top 3 results\n",
    "retriever"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Check how the similarity search works"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 306,
   "metadata": {},
   "outputs": [],
   "source": [
    "## Query From a vector db\n",
    "query = \"The app crashes every time I try to upload a photo\"\n",
    "result = vectorstore.similarity_search(query, k=3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 307,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[Document(metadata={'source': 'Data/knowledge_base.txt'}, page_content='Category 2 - App Functionality - App crashes can be caused by outdated software or device incompatibility.'),\n",
       " Document(metadata={'source': 'Data/knowledge_base.txt'}, page_content='Category 5 - Performance Issues - Performance issues can be related to device specifications, network connectivity, or app optimization.'),\n",
       " Document(metadata={'source': 'Data/knowledge_base.txt'}, page_content='Category 1 - Login Issues - Login issues often occur due to incorrect passwords or account lockouts.')]"
      ]
     },
     "execution_count": 307,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Creating Retrieval Chain using Retriever, Prompt, and Model "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 295,
   "metadata": {},
   "outputs": [],
   "source": [
    "system_prompt = (\n",
    "        \"\"\"\n",
    "            ### Instruction\n",
    "            Context:\n",
    "            {context}\n",
    "\n",
    "            Understand the context containing labels and their description and then,\n",
    "\n",
    "            Classify the input text below into one of the following labels based on the context:\n",
    "            Category 1 - Login Issues\n",
    "            Category 2 - App Functionality\n",
    "            Category 3 - Billing \n",
    "            Category 4 - Account Management\n",
    "            Category 5 - Performance Issues\n",
    "\n",
    "            If you dont know the answer or the input is completely irrelevant to the context just say 'I don't know'. \n",
    "            \n",
    "            Just output the label and nothing else. \n",
    "        \"\"\"\n",
    "    )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 296,
   "metadata": {},
   "outputs": [],
   "source": [
    "qa_prompt = ChatPromptTemplate.from_messages(\n",
    "        [\n",
    "            (\"system\", system_prompt),\n",
    "            (\"human\", \"{input}\"),\n",
    "        ]\n",
    "    )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 297,
   "metadata": {},
   "outputs": [],
   "source": [
    "question_answer_chain = create_stuff_documents_chain(model, qa_prompt)\n",
    "rag_chain = create_retrieval_chain(retriever, question_answer_chain)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Trying the Support Tickets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 298,
   "metadata": {},
   "outputs": [],
   "source": [
    "support_tickets = [\n",
    "    {\"text\": \"My account login is not working. I've tried resetting my password twice.\"},\n",
    "    {\"text\": \"The app crashes every time I try to upload a photo.\"},\n",
    "    {\"text\": \"I was charged twice for my last subscription payment.\"},\n",
    "    {\"text\": \"I can't find the option to change my profile picture.\"},\n",
    "    {\"text\": \"The video playback is very laggy on my device.\"}\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 299,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Ticket: My account login is not working. I've tried resetting my password twice.\n",
      "Model Response: Category 1 - Login Issues\n",
      "\n",
      "Ticket: The app crashes every time I try to upload a photo.\n",
      "Model Response: Category 2 - App Functionality\n",
      "\n",
      "Ticket: I was charged twice for my last subscription payment.\n",
      "Model Response: Category 3 - Billing\n",
      "\n",
      "Ticket: I can't find the option to change my profile picture.\n",
      "Model Response: Category 4 - Account Management\n",
      "\n",
      "Ticket: The video playback is very laggy on my device.\n",
      "Model Response: Category 5 - Performance Issues\n",
      "\n"
     ]
    }
   ],
   "source": [
    "for ticket in support_tickets:\n",
    "    print(f\"Ticket: {ticket['text']}\")\n",
    "    response = rag_chain.invoke({\"input\": ticket['text']})\n",
    "    print(f\"Model Response: {response['answer']}\")\n",
    "    print()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Using Different Support tickets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 300,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Category 5 - Performance Issues'"
      ]
     },
     "execution_count": 300,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "response = rag_chain.invoke({\"input\": \"App is running very slowly\"})\n",
    "response['answer']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 301,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Category 4 - Account Management'"
      ]
     },
     "execution_count": 301,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "response = rag_chain.invoke({\"input\": \"Want to delete my account\"})\n",
    "response['answer']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 302,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Category 3 - Billing'"
      ]
     },
     "execution_count": 302,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "response = rag_chain.invoke({\"input\": \"Refund not processed\"})\n",
    "response['answer']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 303,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Category 2 - App Functionality'"
      ]
     },
     "execution_count": 303,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "response = rag_chain.invoke({\"input\": \"The app crashes very often\"})\n",
    "response['answer']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 304,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Category 1 - Login Issues'"
      ]
     },
     "execution_count": 304,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "response = rag_chain.invoke({\"input\": \"Account access denied\"})\n",
    "response['answer']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 305,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\"I don't know.\""
      ]
     },
     "execution_count": 305,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "response = rag_chain.invoke({\"input\": \"How are you?\"})\n",
    "response['answer']"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
